# ---
# Fudenberg, Gao & Liang:
# "How Flexible is that Functional Form? Quantifying the Restrictiveness of Theories"
# 
# Application 3: Microfinance Takeup

# "GenLinearData.R"

# Research assistant: Stephanie Nam
# Date: 09/24/2023

# NOTE: This R script generates the variables to be used subsequently in the linear models

# ---

library(paramtest)
library(dplyr)
library(igraph)
library(sandwich)
library(lmtest)
library(rockchalk)
library(expm)
library(Rsolnp)
library(plotly)

sink("GenLinearData.log", append=FALSE, split=FALSE)


# Load Village Networks & Find Eigenvector Centralities
villages_eig <- lapply(list.files(path = "adj_all", pattern = "*.csv",
                                  full.names = TRUE), read.csv,
                       header = FALSE) %>%
  lapply(as.matrix) %>%
  lapply(graph.adjacency, mode = "undirected") %>%
  lapply(eigen_centrality, scale = FALSE) %>%
  lapply(function(l) l[[1]])

# Load Leader Status for All Villages
leaders <- lapply(list.files(path = "HHhasALeader", pattern = "*.csv",
                             full.names = TRUE), read.csv, sep = "",
                  header = FALSE) %>%
  lapply(function(l) l[l$V2 == 1,]$V1)

# Load Nonleader Status for All Villages
nonleaders <- lapply(list.files(path = "HHhasALeader", pattern = "*.csv",
                                full.names = TRUE), read.csv, sep = "",
                     header = FALSE) %>%
  lapply(function(l) l[l$V2 == 0,]$V1)

# Load Microfinance Participation Status for All Villages
mf <- lapply(list.files(path = "MF", pattern = "*.csv",
                        full.names = TRUE),
             read.csv, header = FALSE)

# Function for Finding Eigenvector Centrality of Leaders
findCentrality <- function(eig, lead) {
  mean(eig[lead])
}

# Eigenvector Centrality of Leaders Per Village
leader_eig <- mapply(findCentrality, eig = villages_eig, lead = leaders)

# Microfinance Participation Rate of Nonleader Households
mf_rate <- mapply(function(non,m) sum(m[non,])/length(non), non = nonleaders, m = mf)

# Load in the 43 Village Networks
villages <- lapply(list.files(path = "adj_all", pattern = "*.csv",
                              full.names = TRUE), read.csv,
                   header = FALSE) %>%
  lapply(as.matrix) %>%
  lapply(graph.adjacency, mode = "undirected") 


# Average True Take Up Rate
n <- mean(mf_rate) # 0.1848563
# MSE between Naive Model and True Take Up Rate
nm_error <- 1/43 * sum((mean(mf_rate) - mf_rate)^2)
nm_error


# Load in the 43 Village Networks
villages <- lapply(list.files(path = "adj_all", pattern = "*.csv",
                              full.names = TRUE), read.csv,
                   header = FALSE) %>%
  lapply(as.matrix) %>%
  lapply(graph.adjacency, mode = "undirected")
villages_deg <- villages %>% lapply(degree)
villages_betweenness <- villages %>% lapply(betweenness)


# Explore Network Centrality Measures
# avg_eig <- villages_eig %>% lapply(mean) %>% unlist()
leaders_deg <- mapply(function(d, l) {mean(d[l])}, d = villages_deg, l = leaders)
avg_deg <- villages %>% lapply(degree) %>% lapply(mean) %>% unlist()
leaders_betweenness <- mapply(function(b, l) {mean(b[l])}, b = villages_betweenness, l = leaders)
avg_path_length <- unlist(lapply(villages, average.path.length))
cluster_coeff <- lapply(villages, transitivity) %>% unlist()
comp_count <- (lapply(villages, count_components) %>% unlist() - 1) / (lapply(villages, gorder) %>% unlist())
leaders_rate <- (leaders %>% lapply(length) %>% unlist()) / (lapply(villages, gorder) %>% unlist())

save.image(file = 'NetLinearData.RData')

sink()

